You are currently looking at version 1.0 of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the Jupyter Notebook FAQ course resource.
In [1]:
import pandas as pd
import numpy as np
In [13]:
for i in range(5):
coinflip = np.random.binomial(1, 0.5)
print(coinflip)
In [18]:
np.random.binomial(1000, 0.5)/1000
Out[18]:
In [14]:
chance_of_tornado = 0.01/100
np.random.binomial(100000, chance_of_tornado)
Out[14]:
In [30]:
chance_of_tornado = 0.01
tornado_events = np.random.binomial(1, chance_of_tornado, 1000000)
two_days_in_a_row = 0
for j in range(1,len(tornado_events)-1):
if tornado_events[j]==1 and tornado_events[j-1]==1:
two_days_in_a_row+=1
print('{} tornadoes back to back in {} years'.format(two_days_in_a_row, 1000000/365))
In [35]:
np.random.uniform(0, 1)
Out[35]:
In [38]:
np.random.normal(0.75)
Out[38]:
Formula for standard deviation $$\sqrt{\frac{1}{N} \sum_{i=1}^N (x_i - \overline{x})^2}$$
In [39]:
distribution = np.random.normal(0.75,size=1000)
np.sqrt(np.sum((np.mean(distribution)-distribution)**2)/len(distribution))
Out[39]:
In [40]:
np.std(distribution)
Out[40]:
In [41]:
import scipy.stats as stats
stats.kurtosis(distribution)
Out[41]:
In [42]:
stats.skew(distribution)
Out[42]:
In [63]:
chi_squared_df2 = np.random.chisquare(10, size=10000)
stats.skew(chi_squared_df2)
Out[63]:
In [44]:
chi_squared_df5 = np.random.chisquare(5, size=10000)
stats.skew(chi_squared_df5)
Out[44]:
In [64]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
output = plt.hist([chi_squared_df2,chi_squared_df5], bins=200, histtype='step',
label=['2 degrees of freedom','5 degrees of freedom'])
plt.legend(loc='upper right')
Out[64]:
In [65]:
df = pd.read_csv('grades.csv')
In [66]:
df.head()
Out[66]:
In [67]:
len(df)
Out[67]:
In [68]:
early = df[df['assignment1_submission'] <= '2015-12-31']
late = df[df['assignment1_submission'] > '2015-12-31']
In [76]:
early.mean()
Out[76]:
In [70]:
late.mean()
Out[70]:
In [71]:
from scipy import stats
stats.ttest_ind?
In [72]:
stats.ttest_ind(early['assignment1_grade'], late['assignment1_grade'])
Out[72]:
In [73]:
stats.ttest_ind(early['assignment2_grade'], late['assignment2_grade'])
Out[73]:
In [74]:
stats.ttest_ind(early['assignment3_grade'], late['assignment3_grade'])
Out[74]: